

###########################
##setting up IFLS outcome##
## data####################
###########################
getwd()

ifls_14 <- read.dta("./_3_data/ifls_attitude_data/b3a_tr.dta") %>%
  dplyr::select(hhid14_9, pidlink, tr28, tr26, tr25, tr24)

ifls_14_cov <- read.dta("./_3_data/ifls_attitude_data/b3a_cov.dta") %>%
  dplyr::select(age, pidlink)

ifls_14_cov2 <- read.dta("./_3_data/ifls_attitude_data/b3a_dl1.dta") %>%
  dplyr::select(pidlink, dl01f, dl01g, dl01h, dl06)

ifls_14_cov3 <- read.dta("./_3_data/ifls_attitude_data/b3a_tk2.dta") %>%
  dplyr::select(pidlink, tk25a2)

ifls_14_track <- read.dta("./_3_data/ifls_attitude_data/htrack.dta") %>%
  dplyr::select(mkid14, hhid14_9, sc01_14_14, sc02_14_14, sc03_14_14, commid14)

ifls_14_comm <- read.dta("./_3_data/ifls_attitude_data/bk1.dta") %>%
  dplyr::select(tr18, tr17 , tr13, tr05, commid14)

analysis_df =
  left_join(ifls_14, ifls_14_cov, by = "pidlink") %>%
  left_join(., ifls_14_cov2, by = "pidlink") %>%
  left_join(., ifls_14_cov3, by = "pidlink") %>%
  left_join(., ifls_14_track, by = "hhid14_9") %>%
  #need to finish this -- can't download the dataset while  on the plane
  left_join(., ifls_14_comm, by = "commid14")
  
rm(ifls_14, ifls_14_cov,ifls_14_cov2, ifls_14_cov3, ifls_14_track, ifls_14_comm)


analysis_df %<>%
  mutate_at(vars(c("tr24", "tr25", "tr26", "tr28")), funs(as.numeric(.))) %>%
  mutate(outcome1 = case_when(tr28 %in% c(1, 2) ~ 1,
                              TRUE ~ 0),
         outcome2 = case_when(tr24 %in% c(1, 2) ~ 1,
                              TRUE ~ 0),
         outcome3 = case_when(tr25 %in% c(1, 2) ~ 1,
                              TRUE ~ 0),
         outcome4 = case_when(tr26 %in% c(1, 2) ~ 1,
                              TRUE ~ 0),
         cina = as.numeric(dl01f == "F"),
         elem_school = as.numeric(dl06 == "2:Elementary school" | dl06 == "3:Junior high general"))
         

inequality_temp=
  analysis_df %>%
  distinct(mkid14) %>%
  mutate(ineq = NA)

for(i in 1:nrow(inequality_temp)){
  inequality_temp[i,]$ineq <- ineq(analysis_df[analysis_df$mkid14 == inequality_temp[i,]$mkid14,]$tk25a2, type = "Gini", na.rm = T) 
}

analysis_df <- left_join(analysis_df, inequality_temp, by = "mkid14")

###making gini coefficients




###########################
##setting up IFLS coord ##
## data####################
###########################


##################################################
#original coords IFLS1 - IFLS4, and also wave 5#
################################################

coord =
  rbind(
  #original coordinates
  read.dta("./_3_data/ifls_coordinate_data/latlong.dta") %>%
  mutate_at(vars(c("lk07ba3", "lk07bb3")), funs(as.character(.))) %>%
  mutate(lat = paste0(lk07ba2, " ", lk07ba3),
         long = paste0(lk07bb2, " ", lk07bb3),
         lat = measurements::conv_unit(lat, from = 'deg_dec_min', to = 'dec_deg'),
         long = measurements::conv_unit(long, from = 'deg_dec_min', to = 'dec_deg'),
         lat = case_when(lk07ba1 == "S" ~ paste0("-", lat),
                         TRUE ~ lat)) %>%
  dplyr::select(lat, long, MKID14 = commid00) %>%
  distinct() %>%
  #adding a zero to end of the comm identifier to conform with later conventions in coding
  mutate(MKID14 = paste0(MKID14, "0"),
         wave = "orig"),
  
#2014 coords -- IFLS5##
  read.dta13("./_3_data/ifls_coordinate_data/ifls5_longlat.dta") %>%
  mutate_at(vars(c("LK07AA3", "LK07AB3")), funs(as.character(.))) %>%
  mutate(lat = paste0(LK07AA2, " ", LK07AA3),
         long = paste0(LK07AB2, " ", LK07AB3),
         lat = measurements::conv_unit(lat, from = 'deg_dec_min', to = 'dec_deg'),
         long = measurements::conv_unit(long, from = 'deg_dec_min', to = 'dec_deg'),
         lat = case_when(LK07AA1 == "S" ~ paste0("-", lat),
                         TRUE ~ lat)) %>%
  dplyr::select(lat, long, MKID14) %>%
  distinct() %>%
  mutate(wave = "five"))





################################
## binding and##################
##getting env data##############
## writing to file##############
################################

#binding
#nakiung spatial coordinates frame
spcoord = coord %>%
  mutate_at(vars("lat", "long"), funs(as.numeric(.)))

coordinates(spcoord) = ~long + lat

#getting altitude data
#altitude <- getData('alt', country = "IDN")
rugged = 
  raster("./_3_data/map_data/rasters/GloSlopesCl1_5min.asc") %>%
  extract(., spcoord) %>%
  data.frame(spcoord@data$MKID14, .) %>%
  set_colnames(c("MKID14", "rugged"))


temp_prec <- getData("worldclim",var="bio",res=10)
temp_prec <- 
  temp_prec[[c(5,12)]] %>%
  extract(., spcoord) %>%
  data.frame(spcoord@data$MKID14, .) %>%
  set_colnames(c("MKID14", "temp","prec")) %>%
  mutate(temp = temp/10)

soil_qual = 
  raster("./_3_data/map_data/rasters/sq1.asc") %>%
  extract(.,spcoord) %>%
  data.frame(spcoord@data$MKID14, .) %>%
  set_colnames(c("MKID14", "soil_quality"))

soil_oxy =
  raster("./_3_data/map_data/rasters/sq4.asc") %>%
  extract(., spcoord) %>%
  data.frame(spcoord@data$MKID14, .) %>%
  set_colnames(c("MKID14", "soil_oxy"))

coord = 
  left_join(coord, rugged, by = "MKID14") %>%
  left_join(., temp_prec, by = "MKID14") %>%
  left_join(., soil_qual, by = "MKID14") %>%
  left_join(., soil_oxy, by = "MKID14")

#NOTE TO REPLICATION FILE -- FIX OUTPUT DIRECTORY FOR THIS FILE
write.csv(coord, "./_3_data/ifls_coordinate_data/rendered_coordinates.csv")



################################
## calculating comm distance####
## to opium boundary############
################################

#area
banned_areas <- readShapePoly("./_3_data/map_data/opium_maps/opium_banned_1886.shp")
#boundary
boundary <- readShapeLines("./_3_data/map_data/opium_maps/opium_boundary_1886.shp") 

spcoord = coord %>%
  mutate_at(vars("lat", "long"), funs(as.numeric(.)))

coordinates(spcoord) = ~long + lat

#finding communities in banned area
banned =
  over(spcoord, banned_areas) %>%
  mutate(rowid = rownames(.),
         rowid = as.numeric(rowid)) %>%
  mutate(id = case_when(is.na(id) ~ 0,
                        TRUE ~ 1)) %>%
  set_colnames(c("opium_ban", "rowid"))

coord = 
  coord %>%
  rowid_to_column() %>%
  left_join(., banned, by = "rowid")

coord =
  coord %>%
  mutate_at(vars(c("lat", "long")), funs(as.numeric(.))) %>%
  filter(long < 360)

#finding distance to opium boundary
coord$dist <- NA
coord$border_lat <- NA
coord$border_long <- NA

for(i in 1:nrow(coord)){
  coord[i,]$dist <- dist2Line(c(coord[i,]$long, coord[i,]$lat), boundary)[1,1]
  coord[i,]$border_long <- dist2Line(c(coord[i,]$long, coord[i,]$lat), boundary)[1,2]
  coord[i,]$border_lat <- dist2Line(c(coord[i,]$long, coord[i,]$lat), boundary)[1,3]
}



################################
## calculating comm distance####
## to placebo salt boundary#####
################################

#area
salt_prohibit_areas <- readShapePoly("./_3_data/map_data/salt_maps/clipped_salt_monopoly_prohibit_map.shp")
#boundary
salt_boundary <- readShapeLines("./_3_data/map_data/salt_maps/boundary_2.shp", repair = T) 

spcoord_salt = coord %>%
  mutate_at(vars("lat", "long"), funs(as.numeric(.)))

coordinates(spcoord_salt) = ~long + lat

#finding communities in banned area
salt_banned =
  over(spcoord_salt, salt_prohibit_areas) %>%
  mutate(rowid = rownames(.),
         rowid = as.numeric(rowid)) %>%
  mutate(salt_banned = case_when(is.na(GID_0) ~ 0,
                        TRUE ~ 1)) %>%
  dplyr::select(rowid, salt_banned)

coord = 
  coord %>%
  left_join(., salt_banned, by = "rowid")

coord =
  coord %>%
  mutate_at(vars(c("lat", "long")), funs(as.numeric(.))) %>%
  filter(long < 360)

#finding distance to opium boundary
coord$salt_dist <- NA
coord$salt_border_lat <- NA
coord$salt_border_long <- NA

for(i in 1:nrow(coord)){
  coord[i,]$salt_dist <- dist2Line(c(coord[i,]$long, coord[i,]$lat), salt_boundary)[1,1]
  coord[i,]$salt_border_lat <- dist2Line(c(coord[i,]$long, coord[i,]$lat), salt_boundary)[1,2]
  coord[i,]$salt_border_long <- dist2Line(c(coord[i,]$long, coord[i,]$lat), salt_boundary)[1,3]
}





#merging data

analysis_df_individual_level = 
  left_join(analysis_df,
            coord,
            by = c("mkid14" = "MKID14")) %>%
  mutate(opium_legal = case_when(opium_ban == 1 ~ 0,
                                 opium_ban == 0 ~ 1,
                                 TRUE ~ NA_real_),
         forcing = case_when(opium_legal == 0 ~ (dist - (dist*2)),
                             TRUE ~ dist),
         salt_banned_forcing = case_when(salt_banned == 0 ~ (salt_dist - (salt_dist*2)),
                                         TRUE ~ dist),
         salt_mono_legal = case_when(salt_banned == 1 ~ 0,
                                     salt_banned == 0 ~ 1,
                                     TRUE ~ NA_real_),
         salt_mono_legal_forcing = case_when(salt_mono_legal == 0 ~ (salt_dist - (salt_dist*2)),
                                             TRUE ~ salt_dist))


analysis_df_cluster_level = 
  left_join(coord,
            analysis_df %>%
              group_by(mkid14, prov_code = sc01_14_14, kab_code = sc02_14_14, kec_code = sc03_14_14) %>%
              mutate_at(vars(c("outcome1", "outcome2", "outcome3", "outcome4")),
                        funs(mean(., na.rm = T))) %>%
              dplyr::select(mkid14, outcome1, outcome2, outcome3, outcome4) %>%
              mutate(num_resp = n()) %>%
              mutate(weight = num_resp/32344) %>%
              distinct(),
            by = c("MKID14" = "mkid14")) %>%
  mutate(opium_legal = case_when(opium_ban == 1 ~ 0,
                                 opium_ban == 0 ~ 1,
                                 TRUE ~ NA_real_),
         forcing = case_when(opium_legal == 0 ~ (dist - (dist*2)),
                             TRUE ~ dist)) %>%
  #this makes the bins for the plot later
  mutate(dist_groups = cut(forcing, breaks = seq(-15000,15000, by=1500), dig.lab = 6))

analysis_df_cluster_level = 
  analysis_df_cluster_level %>%
    left_join(.,
              analysis_df_cluster_level %>%
                filter(!is.na(dist_groups)) %>%
                group_by(dist_groups) %>%
                summarise_at(vars(starts_with("outcome")), funs(weighted.mean(., num_resp, na.rm = T))) %>%
                rename_at(vars(starts_with("outcome")), function(x) paste0(x, "_weighted_bins")),
              by = "dist_groups") %>%
    mutate(dist_groups_split = dist_groups,
           dist_groups_split = str_replace_all(dist_groups_split, "\\(|\\)|\\[|\\]", "")) %>%
    separate(dist_groups_split, into = c("start_bin", "end_bin"), sep = ",") %>%
    mutate_at(vars(start_bin, end_bin), funs(as.numeric(.))) %>%
    mutate(bin_position = (start_bin + end_bin)/2)
  


write.csv(analysis_df_individual_level, "./_3_data/_cleaned_data/analysis_df_individual_level.csv")
write.csv(analysis_df_cluster_level, "./_3_data/_cleaned_data/analysis_df_cluster_level.csv")





##removing the lat/long identifiers for privacy concerns


analysis_df_individual_level_depersonalized <- analysis_df_individual_level %>% dplyr::select(-c(lat, long))             
analysis_df_cluster_level_depersonalized <- analysis_df_cluster_level %>% dplyr::select(-c(lat, long))                   

write.csv(analysis_df_individual_level, "./_3_data/_cleaned_data/analysis_df_individual_level.csv")
write.csv(analysis_df_cluster_level, "./_3_data/_cleaned_data/analysis_df_cluster_level.csv")


